#############################################
# Simulation II
# heterogeneous datasets
#############################################

# simulate data
set.seed(12345)
x <- rnorm(200,0,1)
betas <- x^2
ystar <- betas * x 
pr <- exp(ystar)/(1 + exp(ystar))
y <- rbinom(200,1,pr)
simdat1 <- as.data.frame(cbind(y,x))
# logit estimation
logitmodel <- glm(y~x,simdat1,family=binomial(link="logit"))
simdat1_new <- simdat1
simdat1_new$x <- seq(-2,2,length.out = 200)
# logit prediction
pred_logit <- predict(logitmodel,simdat1_new,type="response",se.fit = T)
pred_mean <- pred_logit$fit
pred_u <- pred_logit$fit + 1.96 * pred_logit$se.fit
pred_l <- pred_logit$fit - 1.96 * pred_logit$se.fit
pred_y <- ifelse(pred_mean > 0.5,1,0)
# logit performance
pred_logit_rate <- length(which(pred_y==y))/200 # 40.5%

##############
# CBQ: q1 - q9
##############
qmodel1 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 1)

qmodel2 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 2)

qmodel3 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 3)

qmodel4 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 4)

qmodel5 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 5)

qmodel6 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 6)

qmodel7 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 7)

qmodel8 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 8)


qmodel9 <- cbq(N = length(y),
               n_covariate = 1,
               X = as.matrix(x,nrow=length(y)),
               Y = y,
               niter = 1000,
               qtl = 9)

# extract parameters
coef1 <- summary(qmodel1)
coef1 <- coef1$summary
coef2 <- summary(qmodel2)
coef2 <- coef2$summary
coef3 <- summary(qmodel3)
coef3 <- coef3$summary
coef4 <- summary(qmodel4)
coef4 <- coef4$summary
coef5 <- summary(qmodel5)
coef5 <- coef5$summary
coef6 <- summary(qmodel6)
coef6 <- coef6$summary
coef7 <- summary(qmodel7)
coef7 <- coef7$summary
coef8 <- summary(qmodel8)
coef8 <- coef8$summary
coef9 <- summary(qmodel9)
coef9 <- coef9$summary

coef_mean <- cbind(coef1[1:2,1],
                   coef2[1:2,1],
                   coef3[1:2,1],
                   coef4[1:2,1],
                   coef5[1:2,1],
                   coef6[1:2,1],
                   coef7[1:2,1],
                   coef8[1:2,1],
                   coef9[1:2,1])

cbq_sd2 <- apply(coef_mean,1,sd)

coef_25 <- cbind(coef1[1:2,4],
                 coef2[1:2,4],
                 coef3[1:2,4],
                 coef4[1:2,4],
                 coef5[1:2,4],
                 coef6[1:2,4],
                 coef7[1:2,4],
                 coef8[1:2,4],
                 coef9[1:2,4])

coef_975 <- cbind(coef1[1:2,8],
                  coef2[1:2,8],
                  coef3[1:2,8],
                  coef4[1:2,8],
                  coef5[1:2,8],
                  coef6[1:2,8],
                  coef7[1:2,8],
                  coef8[1:2,8],
                  coef9[1:2,8])


xb1 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,1],nrow=2)
xb2 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,2],nrow=2)
xb3 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,3],nrow=2)
xb4 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,4],nrow=2)
xb5 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,5],nrow=2)
xb6 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,6],nrow=2)
xb7 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,7],nrow=2)
xb8 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,8],nrow=2)
xb9 <-  cbind(rep(1,200),simdat1_new$x)  %*% as.matrix(coef_mean[,9],nrow=2)

qprob1 <- pald(xb1,0.1)
qprob2 <- pald(xb2,0.2)
qprob3 <- pald(xb3,0.3)
qprob4 <- pald(xb4,0.4)
qprob5 <- pald(xb5,0.5)
qprob6 <- pald(xb6,0.6)
qprob7 <- pald(xb7,0.7)
qprob8 <- pald(xb8,0.8)
qprob9 <- pald(xb9,0.9)

qpred_y1 <- ifelse(qprob1>0.5,1,0)
qpred_y2 <- ifelse(qprob2>0.5,1,0)
qpred_y3 <- ifelse(qprob3>0.5,1,0)
qpred_y4 <- ifelse(qprob4>0.5,1,0)
qpred_y5 <- ifelse(qprob5>0.5,1,0)
qpred_y6 <- ifelse(qprob6>0.5,1,0)
qpred_y7 <- ifelse(qprob7>0.5,1,0)
qpred_y8 <- ifelse(qprob8>0.5,1,0)
qpred_y9 <- ifelse(qprob9>0.5,1,0)

pred_rate1 <- length(which(qpred_y1==y))/200
pred_rate2 <- length(which(qpred_y2==y))/200
pred_rate3 <- length(which(qpred_y3==y))/200
pred_rate4 <- length(which(qpred_y4==y))/200
pred_rate5 <- length(which(qpred_y5==y))/200
pred_rate6 <- length(which(qpred_y6==y))/200
pred_rate7 <- length(which(qpred_y7==y))/200
pred_rate8 <- length(which(qpred_y8==y))/200
pred_rate9 <- length(which(qpred_y9==y))/200
pred_rates <- c(pred_rate1,
                pred_rate2,
                pred_rate3,
                pred_rate4,
                pred_rate5,
                pred_rate6,
                pred_rate7,
                pred_rate8,
                pred_rate9
)

pred_rate_heter <- c(pred_logit_rate,pred_rates)

# produce plots
pdf("figures/figure2_2.pdf",width = 7,height = 7)
plot(NA,xlim=c(-2,2),
     ylim=c(0,1),
     xlab = "X",
     ylab = "Predicted probability")
lines(seq(-2,2,length.out = 200), qprob1,col="gray" )
lines(seq(-2,2,length.out = 200), qprob2,col="gray" )
lines(seq(-2,2,length.out = 200), qprob3,col="gray" )
lines(seq(-2,2,length.out = 200), qprob4,col="gray" )
lines(seq(-2,2,length.out = 200), qprob5,col="gray" )
lines(seq(-2,2,length.out = 200), qprob6,col="gray" )
lines(seq(-2,2,length.out = 200), qprob7,col="gray" )
lines(seq(-2,2,length.out = 200), qprob8,col="gray" )
lines(seq(-2,2,length.out = 200), qprob9,col="gray" )
points(seq(-2,2,length.out = 200),pred_mean,pch=18)
abline(v=c(-1,1),lty="dashed")
legend("topleft",legend="Heterogeneous dataset")
legend("bottomright",legend=c("Ordinary logit","CBQ"),
       lty = c(NA,"solid"),
       pch = c(18,NA),
       bty="n")
dev.off()

